{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Update to the langgraph lab4:\n",
    "- Includes handling for cookie pop ups (both dialogs and html).  Will not work 100% of the time as depends on button naming.\n",
    "- Includes the openai reasoning mini model (o4-mini) as the worker.  I found that gpt-4.1-mini (or gpt-40-mini) struggled with the right searches to use in some cases.\n",
    "- Updated the worker prompt to push for tool usage over training data.\n",
    "- Includes Claude as the evaluator agent\n",
    "- Update to the evaluator agent system prompt as Claude was being very pedantic and rejecting everything!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import Annotated, TypedDict, List, Dict, Any, Optional\n",
    "from langchain_core.messages import AIMessage, HumanMessage, SystemMessage\n",
    "from langchain_openai import ChatOpenAI\n",
    "from langchain_community.agent_toolkits import PlayWrightBrowserToolkit\n",
    "from langchain_community.tools.playwright.utils import create_async_playwright_browser\n",
    "from langgraph.graph import StateGraph, START, END\n",
    "from langgraph.checkpoint.memory import MemorySaver\n",
    "from langgraph.prebuilt import ToolNode\n",
    "from langgraph.graph.message import add_messages\n",
    "from pydantic import BaseModel, Field\n",
    "from IPython.display import Image, display\n",
    "import gradio as gr\n",
    "import uuid\n",
    "from dotenv import load_dotenv\n",
    "\n",
    "from langchain_anthropic import ChatAnthropic\n",
    "import datetime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "load_dotenv(override=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# First define a structured output\n",
    "\n",
    "class EvaluatorOutput(BaseModel):\n",
    "    feedback: str = Field(description=\"Feedback on the assistant's response\")\n",
    "    success_criteria_met: bool = Field(description=\"Whether the success criteria have been met\")\n",
    "    user_input_needed: bool = Field(description=\"True if more input is needed from the user, or clarifications, or the assistant is stuck\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# The state\n",
    "\n",
    "class State(TypedDict):\n",
    "    messages: Annotated[List[Any], add_messages]\n",
    "    success_criteria: str\n",
    "    feedback_on_work: Optional[str]\n",
    "    success_criteria_met: bool\n",
    "    user_input_needed: bool"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import nest_asyncio\n",
    "nest_asyncio.apply()\n",
    "\n",
    "# Launch browser\n",
    "async_browser = create_async_playwright_browser(headless=False)\n",
    "context = async_browser.contexts[0] if async_browser.contexts else await async_browser.new_context()\n",
    "\n",
    "# Handle dialog popups\n",
    "async def handle_dialog(dialog):\n",
    "    await dialog.accept()\n",
    "\n",
    "# Handle cookie banners / popups\n",
    "async def handle_page(page):\n",
    "    page.on(\"dialog\", handle_dialog)\n",
    "    await page.wait_for_load_state(\"networkidle\")\n",
    "\n",
    "    consent_texts = [\"accept\", \"accept all\", \"accept all cookies\",\"allow\", \"agree\", \"ok\", \"got it\",\"consent\"]\n",
    "\n",
    "    for text in consent_texts:\n",
    "        try:\n",
    "            locator = page.locator(f\"button:has-text('{text}')\")\n",
    "            if await locator.count() > 0:\n",
    "                await locator.first.click(timeout=2000)\n",
    "                break\n",
    "        except:\n",
    "            continue\n",
    "\n",
    "# Attach the page handler to every new page\n",
    "context.on(\"page\", handle_page)\n",
    "\n",
    "# Toolkit setup\n",
    "toolkit = PlayWrightBrowserToolkit.from_browser(async_browser=async_browser)\n",
    "tools = toolkit.get_tools()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize the LLMs\n",
    "\n",
    "gpt_mini = ChatOpenAI(model=\"gpt-4.1-mini\")\n",
    "gpt_reasoning_mini = ChatOpenAI(model=\"o4-mini\")\n",
    "claude = ChatAnthropic(model=\"claude-3-5-sonnet-20241022\")\n",
    "\n",
    "#assign models\n",
    "worker_llm = gpt_reasoning_mini\n",
    "evaluator_llm = claude\n",
    "\n",
    "#bind tools\n",
    "worker_llm_with_tools = worker_llm.bind_tools(tools)\n",
    "evaluator_llm_with_output = evaluator_llm.with_structured_output(EvaluatorOutput)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# The worker node\n",
    "\n",
    "def worker(state: State) -> Dict[str, Any]:\n",
    "    system_message = f\"\"\"You are a helpful assistant that can use tools to complete tasks.  \n",
    "If you already have the answer in your training data, you should still use the tools to validate against the most up to date information.\n",
    "You keep working on a task until either you have a question or clarification for the user, or the success criteria is met.\n",
    "This is the success criteria:\n",
    "{state['success_criteria']}\n",
    "You should reply either with a question for the user about this assignment, or with your final response.\n",
    "If you have a question for the user, you need to reply by clearly stating your question. An example might be:\n",
    "\n",
    "Question: please clarify whether you want a summary or a detailed answer\n",
    "\n",
    "If you've finished, reply with the final answer, and don't ask a question; simply reply with the answer.\n",
    "\"\"\"\n",
    "    \n",
    "    if state.get(\"feedback_on_work\"):\n",
    "        system_message += f\"\"\"\n",
    "Previously you thought you completed the assignment, but your reply was rejected because the success criteria was not met.\n",
    "Here is the feedback on why this was rejected:\n",
    "{state['feedback_on_work']}\n",
    "With this feedback, please continue the assignment, ensuring that you meet the success criteria or have a question for the user.\"\"\"\n",
    "    \n",
    "    # Add in the system message\n",
    "\n",
    "    found_system_message = False\n",
    "    messages = state[\"messages\"]\n",
    "    for message in messages:\n",
    "        if isinstance(message, SystemMessage):\n",
    "            message.content = system_message\n",
    "            found_system_message = True\n",
    "    \n",
    "    if not found_system_message:\n",
    "        messages = [SystemMessage(content=system_message)] + messages\n",
    "    \n",
    "    # Invoke the LLM with tools\n",
    "    response = worker_llm_with_tools.invoke(messages)\n",
    "    \n",
    "    # Return updated state\n",
    "    return {\n",
    "        \"messages\": [response],\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def worker_router(state: State) -> str:\n",
    "    last_message = state[\"messages\"][-1]\n",
    "    \n",
    "    if hasattr(last_message, \"tool_calls\") and last_message.tool_calls:\n",
    "        return \"tools\"\n",
    "    else:\n",
    "        return \"evaluator\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def format_conversation(messages: List[Any]) -> str:\n",
    "    conversation = \"Conversation history:\\n\\n\"\n",
    "    for message in messages:\n",
    "        if isinstance(message, HumanMessage):\n",
    "            conversation += f\"User: {message.content}\\n\"\n",
    "        elif isinstance(message, AIMessage):\n",
    "            text = message.content or \"[Tools use]\"\n",
    "            conversation += f\"Assistant: {text}\\n\"\n",
    "    return conversation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def evaluator(state: State) -> State:\n",
    "    last_response = state[\"messages\"][-1].content\n",
    "\n",
    "    system_message = f\"\"\"You are an evaluator that determines if a task has been completed successfully by an Assistant.\n",
    "Assess the Assistant's last response based on the given criteria. Respond with your feedback, and with your decision on whether the success criteria has been met,\n",
    "and whether more input is needed from the user.  \n",
    "Important context:\n",
    "1. Your role is to assess if the response reasonably meets the success criteria.\n",
    "2. You do not need to reperform the task in your evaluation. \n",
    "3. You do not need evidence of the steps followed by the assistant.   \n",
    "4. The current date is: {datetime.date.today()}\"\"\"\n",
    "    \n",
    "    user_message = f\"\"\"You are evaluating a conversation between the User and Assistant. You decide what action to take based on the last response from the Assistant.\n",
    "\n",
    "The entire conversation with the assistant, with the user's original request and all replies, is:\n",
    "{format_conversation(state['messages'])}\n",
    "\n",
    "The success criteria for this assignment is:\n",
    "{state['success_criteria']}\n",
    "\n",
    "And the final response from the Assistant that you are evaluating is:\n",
    "{last_response}\n",
    "\n",
    "Respond with your feedback, and decide if the success criteria is met by this response.\n",
    "Also, decide if more user input is required, either because the assistant has a question, needs clarification, or seems to be stuck and unable to answer without help.\n",
    "\"\"\"\n",
    "    if state[\"feedback_on_work\"]:\n",
    "        user_message += f\"Also, note that in a prior attempt from the Assistant, you provided this feedback: {state['feedback_on_work']}\\n\"\n",
    "        user_message += \"If you're seeing the Assistant repeating the same mistakes, then consider responding that user input is required.\"\n",
    "    \n",
    "    evaluator_messages = [SystemMessage(content=system_message), HumanMessage(content=user_message)]\n",
    "\n",
    "    eval_result = evaluator_llm_with_output.invoke(evaluator_messages)\n",
    "    new_state = {\n",
    "        \"messages\": [{\"role\": \"assistant\", \"content\": f\"Evaluator Feedback on this answer: {eval_result.feedback}\"}],\n",
    "        \"feedback_on_work\": eval_result.feedback,\n",
    "        \"success_criteria_met\": eval_result.success_criteria_met,\n",
    "        \"user_input_needed\": eval_result.user_input_needed\n",
    "    }\n",
    "    return new_state"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def route_based_on_evaluation(state: State) -> str:\n",
    "    if state[\"success_criteria_met\"] or state[\"user_input_needed\"]:\n",
    "        return \"END\"\n",
    "        #print(\"END\")\n",
    "    else:\n",
    "        return \"worker\"\n",
    "        #print(\"worker\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set up Graph Builder with State\n",
    "graph_builder = StateGraph(State)\n",
    "\n",
    "# Add nodes\n",
    "graph_builder.add_node(\"worker\", worker)\n",
    "graph_builder.add_node(\"tools\", ToolNode(tools=tools))\n",
    "graph_builder.add_node(\"evaluator\", evaluator)\n",
    "\n",
    "# Add edges\n",
    "graph_builder.add_conditional_edges(\"worker\", worker_router, {\"tools\": \"tools\", \"evaluator\": \"evaluator\"})\n",
    "graph_builder.add_edge(\"tools\", \"worker\")\n",
    "graph_builder.add_conditional_edges(\"evaluator\", route_based_on_evaluation, {\"worker\": \"worker\", \"END\": END})\n",
    "graph_builder.add_edge(START, \"worker\")\n",
    "\n",
    "# Compile the graph\n",
    "memory = MemorySaver()\n",
    "graph = graph_builder.compile(checkpointer=memory)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display(Image(graph.get_graph().draw_mermaid_png()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_thread_id() -> str:\n",
    "    return str(uuid.uuid4())\n",
    "\n",
    "\n",
    "async def process_message(message, success_criteria, history, thread):\n",
    "\n",
    "    config = {\"configurable\": {\"thread_id\": thread}}\n",
    "\n",
    "    state = {\n",
    "        \"messages\": message,\n",
    "        \"success_criteria\": success_criteria,\n",
    "        \"feedback_on_work\": None,\n",
    "        \"success_criteria_met\": False,\n",
    "        \"user_input_needed\": False\n",
    "    }\n",
    "    result = await graph.ainvoke(state, config=config)\n",
    "    user = {\"role\": \"user\", \"content\": message}\n",
    "    reply = {\"role\": \"assistant\", \"content\": result[\"messages\"][-2].content}\n",
    "    feedback = {\"role\": \"assistant\", \"content\": result[\"messages\"][-1].content}\n",
    "    return history + [user, reply, feedback]\n",
    "\n",
    "async def reset():\n",
    "    return \"\", \"\", None, make_thread_id()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "with gr.Blocks(theme=gr.themes.Default(primary_hue=\"emerald\")) as demo:\n",
    "    gr.Markdown(\"## Sidekick Personal Co-worker\")\n",
    "    thread = gr.State(make_thread_id())\n",
    "    \n",
    "    with gr.Row():\n",
    "        chatbot = gr.Chatbot(label=\"Sidekick\", height=300, type=\"messages\")\n",
    "    with gr.Group():\n",
    "        with gr.Row():\n",
    "            message = gr.Textbox(show_label=False, placeholder=\"Your request to your sidekick\")\n",
    "        with gr.Row():\n",
    "            success_criteria = gr.Textbox(show_label=False, placeholder=\"What are your success critiera?\")\n",
    "    with gr.Row():\n",
    "        reset_button = gr.Button(\"Reset\", variant=\"stop\")\n",
    "        go_button = gr.Button(\"Go!\", variant=\"primary\")\n",
    "    message.submit(process_message, [message, success_criteria, chatbot, thread], [chatbot])\n",
    "    success_criteria.submit(process_message, [message, success_criteria, chatbot, thread], [chatbot])\n",
    "    go_button.click(process_message, [message, success_criteria, chatbot, thread], [chatbot])\n",
    "    reset_button.click(reset, [], [message, success_criteria, chatbot, thread])\n",
    "\n",
    "    \n",
    "demo.launch()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
